JuliaStats Statistics and Machine Learning made easy in Julia.
In [29]:
# Pkg.add("DataFrames")
In [30]:
using DataFrames # DataFrames to represent tabular datasets
# Database-style joins and indexing
# Split-apply-combine operations, reshape and pivoting
# Formula and model frames
In [31]:
run(`head data/iris.csv`)
In [32]:
iris = readtable("data/iris.csv")
Out[32]:
Descripción (estadística) del dataset (columnas), similar a summary
de R.
In [33]:
describe(iris)
In [34]:
using Gadfly # Similar a ggplot2 de R
In [35]:
plot(iris, x="Species", y="PetalLength", color="Species", Geom.boxplot)
Out[35]:
In [36]:
plot(iris, color="Species", x="PetalLength", Geom.histogram)
Out[36]:
In [37]:
plot(iris, x=:PetalLength, y=:PetalWidth, color=:Species, Geom.point, Geom.smooth(method=:lm))
Out[37]:
In [38]:
# Pkg.add("GLM")
In [39]:
using GLM # Generalized linear models
linear = fit(LinearModel, PetalWidth ~ PetalLength, iris) # PetalLength en R: 0.4157554
Out[39]:
In [40]:
using Clustering
In [41]:
cl = kmeans(convert(Matrix{Float64}, iris[:, [:PetalWidth, :PetalLength]])', 3)
Out[41]:
In [42]:
cl.centers
Out[42]:
In [43]:
by(iris, :Species, df -> (mean(df[:PetalWidth]), mean(df[:PetalLength])))
Out[43]: